# -*- encoding: utf-8 -*-
# @Time       :  1:35
# @Author     : yuxian
# @Email      : 1503889663@qq.com
# @File       : 正则文本清洗.py
# @SoftWare   : PyCharm
import re


def clean_cleaned_text(cleaned_text):
    # 去掉 `*、(s)、's`
    cleaned_text = re.sub("[*]|\(s\)|\'s", " ", cleaned_text)

    # /
    def rule1(patter):
        matched_string = patter.group(0)
        return matched_string.replace("/", " per ")

    cleaned_text = re.sub("day(s)?/week|\d+(\s)?/year(s)?", rule1, cleaned_text)
    cleaned_text = re.sub("\s+/\s+", "/", cleaned_text)

    def rule2(patter):
        matched_string = patter.group(0)
        return matched_string.replace("/", " or ")

    cleaned_text = re.sub("[A-Za-z]+/[A-Za-z]+", rule2, cleaned_text)
    cleaned_text = re.sub("[A-Za-z]+/\d+", rule2, cleaned_text)
    cleaned_text = re.sub("\d+/[A-Za-z]+", rule2, cleaned_text)

    # 美元
    def rule3(patter):
        match_string = patter.group(0)

        match_string = match_string.replace("between", "").replace("and", "-")
        return match_string.replace(",", "").replace(" ", "").replace("$", "dollar").replace("-", " - ")

    cleaned_text = re.sub("\$\d+(,)?\d+(\s)?-(\s)?\$\d+(,)?\d+", rule3, cleaned_text)
    cleaned_text = re.sub("\$\d+(,)?\d+(\s)?to(\s)?\$\d+(,)?\d+", rule3, cleaned_text)
    cleaned_text = re.sub("between\s+\$\d+(,)?\d+(\s+)?and(\s+)?\$\d+(,)?\d+", rule3, cleaned_text)
    cleaned_text = re.sub("\$", "dollar", cleaned_text)

    # 其它特殊
    cleaned_text = re.sub("401\(k\)", "401k", cleaned_text)
    cleaned_text = re.sub("#.*?\s|@.*?\s|\[\s+[Ll]ink\s+removed\s+]|\(\d+\)|\s+-\s+", " ", cleaned_text)
    cleaned_text = re.sub("San Francisco(,)\s+CA","San Francisco, California")
    return cleaned_text


if __name__ == '__main__':
    str1 = """Job Requisition ID # . predisposition OverviewAutodesk's Customer Experience (CX) Analytics team is looking for a 
passionate and driven Data Analyst with proven experience driving organizational change through rigorous data 
analysis. This role will perform deep data mining and statistical behavioral analysis of Autodesk's customer data 
and will partner with internal business stakeholders to refine key success measures and find insights to drive 
program efficiency. They will play a significant role in shaping a customer-focused and data-driven culture. Join a
dynamic team that is helping to transform business decisions and processes with actionable customer insights gained
from meaningful research, analysis and measurement of the Autodesk customer experience. Job Title:Lead Business 
Intelligence AnalystLocation: San Francisco, Irresponsibility collaboratively and cross functionally to define and 
meet stakeholder requirements Translate business objectives into technical data requirements and balance them with 
technical feasibility, recommending changes in development, maintenance and platform standards as necessary Perform
deep dive analyses to understand trends, anomalies and insights that will drive operational improvements Craft data
stories through presentations, written summaries, and data visualizations that accurately outline problem 
statements and provide actionable and unbiased intelligence and recommendations Communicate findings from 
initiatives with clarity and accountability to the broader organization/stakeholdersClearly document provenance of 
data, ETL logic, and code used to develop models Proactively identify areas in which analytics efforts can answer 
business questions, drive operational improvements and business value Spearhead the development of insight-driven 
tools & dashboards and provide ongoing support regarding functionality and 
data-integrityMinimumRequirementsProficiency using SQL to query large proficiency with Google Analytics and/or 
Adobe Analyticalally experience with Microsoft Excel (pivot tables, advanced modeling, creating charts/graphs) and 
PowerPoint with BI development and database systems such as PowerBI, Looker and Tableau Experience with scripting 
languages such as R, Python or equivalent Experience with Qubole, Snowflake or equivalent The Ideal Candidate 
Extensive experience in roles combining data analysis/business intelligence, research and strategy Strong 
problem-solving skills and sharp business judgmentData-curious, interested in telling the story behind the 
dataDetail-oriented, ensuring data accuracy and consistency Exceptional communication skills, attentive listener 
and compelling influence to new ideas and respectful of differing opinions/perspectivesAddresses difficult problems
head-on and challenges the status quo Can adapt to change and is open to learning new skillsSelf-motivated and can 
work independently in a fast-paced environment Highly collaborative and can work cross-functionally, while 
cultivating relationships with colleagues and stakeholders Passionate about improving the customer experience Click
below to learn more about our benefits in the US.At Autodesk, we're building a diverse workplace and an inclusive 
culture to give more people the chance to imagine, design, and make a better world. Autodesk is proud to be an 
equal opportunity employer and considers all qualified applicants for employment without regard to race, color, 
religion, age, sex, sexual orientation, gender, gender identity, national origin, disability, veteran status or any
other legally protected characteristic. We also consider for employment all qualified applicants regardless of 
criminal histories, consistent with applicable law. Are you an existing contractor or consultant with Autodesk? . 
Please search for open jobs and apply internally (not on this external site). If you have any questions or require 
support, contact Autodesk Careers. Salary is one part of Autodesk's competitive package. For U.S.-based roles, we 
expect a starting base salary between $109,500 and $187,770. Offers are based on the candidate's experience and 
geographic location, and may exceed this range. In addition to base salaries, we also have a significant emphasis 
on annual cash bonuses, commissions for sales roles, stock grants, and a comprehensive benefits package. 
SummaryLocation:San Francisco, CA, USAType: Full timescale ."""
    print(clean_cleaned_text(str1))
    
